
#Load all required libraries here:----
library(readr)
library(dplyr)
library(ggplot2)


#loading customer data
customers_data <- read_csv("C:/Users/User/OneDrive/Desktop/BA assignment/customers_data.csv")

#loading product data

products_data <- read_csv("C:/Users/User/OneDrive/Desktop/BA assignment/products_data.csv")

#loading transaction data
transactions_data <- read_csv("C:/Users/User/OneDrive/Desktop/BA assignment/transactions_data.csv")


glimpse(customers_data)  # Rows:2500, Columns:11

# Change data types in customers_data
customers_data <- customers_data %>%
  mutate(
    Customer_ID = as.character(Customer_ID),  # Change Customer_ID to character
    Age = as.integer(Age),                    # Change Age to integer
    Gender = as.factor(Gender),               # Change Gender to factor
    Year = as.integer(Year),                  # Change Year to integer
    Month = as.integer(Month),                # Change Month to integer
    Day = as.integer(Day)                     # Change Day to integer
  )
glimpse(customers_data)  # Rows:2500, Columns:11

#View(customers_data)

glimpse(products_data)   # Rows:100, Columns:6

# Change data types
products_data <- products_data %>%
  mutate(
    Product_ID = as.character(Product_ID),    # Change Product_ID to character
    Category = as.factor(Category),           # Change Category to factor
    Supplier = as.factor(Supplier),           # Change Supplier to factor
    Discontinued = as.logical(Discontinued == "Yes")  # Convert Discontinued to logical (TRUE/FALSE)
  )
glimpse(products_data)

#View(products_data)

glimpse(transactions_data)  # Rows:15033, Columns:11

# Change data types
transactions_data <- transactions_data %>%
  mutate(
    Transaction_ID = as.character(Transaction_ID),          # Change Transaction_ID to character
    Customer_ID = as.character(Customer_ID), # Change Customer_ID to character
    Year = as.integer(Year),                                # Change Year to integer
    Month = as.integer(Month),                              # Change Month to integer
    Day = as.integer(Day),                                  # Change Day to integer
    Product_ID = as.character(Product_ID),                  # Change Product_ID to character
    Quantity = as.integer(Quantity),                        # Change Quantity to integer
    Store_Location = as.factor(Store_Location)              # Change Store_Location to factor
  )
#View(transactions_data)



#Create a new tibble called transactions_2023
transactions_2023 <- transactions_data%>%
  filter(Year==2023)
#Display the first 10 rows of transactions_2023.
print(transactions_2023,n=10)


# customer_summary tibble
customer_summary <-transactions_data%>%
  group_by(Customer_ID)%>%
  summarise(Total_Transaction_count = n(), #total number of transactions
            Total_Amount_spent= sum(Total_Amount, na.rm = TRUE)) #amount spent by each customer
print(head(customer_summary,10))
#View(customer_summary)


# Creating premium customer tibble:
premium_customers<- customers_data%>%
  inner_join(customer_summary, by = "Customer_ID")%>%  #join customer summary
  select(-Total_Transaction_count)%>% # removing Total_Transaction_count column
  rename(Total_Transaction=Total_Amount_spent)%>%  #renaming Total_Amount_spent column
  filter(Total_Transaction>=10000)%>%  #filtering customers who have spent more than, or equal to $10,000
  arrange((Total_Transaction)) # arrange ascending
#View(premium_customers)



#bar plot of the number of transactions for each product category
product_transaction<- transactions_data%>%
  inner_join(products_data, by = "Product_ID")%>%  # Merging products_data tibble
  group_by(Category)%>%  #group by category
  summarise(Transaction_count = n())%>%  # summaries Transaction_count
  ggplot() +                             # bar plot
  geom_bar(mapping = aes(x = Category, y = Transaction_count), stat = "identity")
product_transaction



# creating discounted_transactions_data tibble
discounted_transactions_data <- transactions_data %>%
  mutate(Discounted_Amount = if_else(Total_Amount > 100, Total_Amount * 0.9, Total_Amount)) #adding new column and applying discount

# summary tibble showing Location, Number_of_Transactions, and Total_Amount_Spent per customer
location_summary <- transactions_data%>%
  inner_join(customers_data, by = "Customer_ID")%>% #merging tables
  group_by(Location,Customer_ID ) %>%
  summarize(total_number_of_transactions= n(),
            Total_Amount_Spent = sum(Total_Amount))%>%  # number of transactions per customer from each location
  arrange(desc(total_number_of_transactions)) #arranged data
print(location_summary)

# summary tibble showing Location, Number_of_Transactions, and Total_Amount_Spent based on location

location_summary <- transactions_data %>%
  inner_join(customers_data, by = "Customer_ID") %>% # merging tables
  group_by(Location) %>%  #group by location
  summarize(
    total_number_of_transactions = n(),
    Total_Amount_Spent = sum(Total_Amount)## number of transactions and total amount spend per customer from each location
  ) %>%
  arrange(desc(total_number_of_transactions))  # arranging the data globally
print(location_summary)

# frequent_customers analysis
frequent_customers <- transactions_2023%>%
  group_by(Customer_ID, Month) %>%      # Group by Customer_ID and Month
  summarise(Purchase_Count = n()) %>% # Count the number of purchases per customer per month
  filter(Purchase_Count >= 3)%>%  # Filter for customers with at least 3 purchases in a month
  select(Customer_ID)%>%      # Select the Customer_ID column
  distinct()                # Keep unique Customer_IDs
frequent_customers
#View(frequent_customers)


frequent_customers <- transactions_2023%>%
  group_by(Customer_ID,Month) %>%         # Group by Customer_ID and Month
  summarise(Purchase_Count = n()) %>%     # Count the number of purchases per customer per month
  filter(Purchase_Count >= 3)%>%        # Filter for customers with at least 3 purchases in a month
  summarise(Distinct_Months = n_distinct(Month))%>%  #count distinct month
  filter(Distinct_Months == 12)     # customers who visited all months 
frequent_customers 
  

# Category sale

quantity_transaction<- transactions_data%>%
  inner_join(products_data, by = "Product_ID")
#View(quantity_transaction)
  filter(Category == "Electronics")%>%
  ggplot() +                                       #plot scatter and line plot, color added for store location
  geom_point(mapping = aes(x = Quantity, y = Total_Amount, color= Store_Location))+
  geom_smooth(mapping = aes(x = Quantity, y = Total_Amount,color= Store_Location ))+
  ggtitle("Total Amount vs.Quantity for transactions in the Electronics category")+
  labs(x = "Quantity for transactions", y = "Total_Amount")
quantity_transaction   


    







